Krushang Shah 06/18/2020

Setup

library(tidyverse)
library(scales)
library(modelr)
library(lubridate)
library(plotly)
library(broom)

theme_set(theme_bw())
options(repr.plot.width=4, repr.plot.height=3)
knitr::opts_chunk$set(echo = TRUE)

Load Data

load('.RData')
load('trips_2015.RData')

Modify 2015 data to feed into the model

# Adding holiday list
trips_per_day_2015 <- left_join(trips_per_day_2015, holidays, by="ymd") %>% mutate(holiday = ifelse(is.na(holiday), 0, 1))

# Adding weekdays and weekends
trips_per_day_2015 <- trips_per_day_2015 %>% mutate(weekdays=as.factor(weekdays(ymd))) %>% mutate(isWeekend = ifelse(weekdays=="Saturday"|weekdays=="Sunday", 1, 0))

Model description

tidy(model)

Test on 2015 data

rmse(model,trips_per_day_2015)
## [1] 8007.7
rsquare(model,trips_per_day_2015)
## [1] 0.7335286
trips_per_day_2015 <- trips_per_day_2015 %>%
  add_predictions(model) %>%
  mutate(split = "2015")

plot_data <- trips_per_day_2015

ggplotly(ggplot(plot_data, aes(x = pred, y = num_trips)) +
  geom_point(aes(color = split)) +
  geom_line(aes(y = pred)) +
  xlab('Predicted number of daily trips') +
  ylab('Actual number of daily trips') +
  scale_y_continuous())
ggplotly(ggplot(plot_data, aes(x = ymd, y = num_trips)) +
  geom_point(aes(color = split)) +
  geom_line(aes(y = pred)) +
  xlab('Day of the year') +
  ylab('Daily trips') +
  scale_y_continuous())

Comparision between 2014 and 2015

rmse(model,trips_per_day)
## [1] 3471.606
rsquare(model,trips_per_day)
## [1] 0.8851454
rmse(model,trips_per_day_2015)
## [1] 8007.7
rsquare(model,trips_per_day_2015)
## [1] 0.7335286
trips_per_day <- trips_per_day %>%
  add_predictions(model) %>%
  mutate(split = "2014")

plot_data <- bind_rows(trips_per_day, trips_per_day_2015)

ggplotly(ggplot(plot_data, aes(x = pred, y = num_trips)) +
  geom_point(aes(color = split)) +
  geom_line(aes(y = pred)) +
  xlab('Predicted number of daily trips') +
  ylab('Actual number of daily trips') +
  scale_y_continuous())
ggplotly(ggplot(plot_data, aes(x = ymd, y = num_trips)) +
  geom_point(aes(color = split)) +
  geom_line(aes(y = pred)) +
  xlab('Day of the year') +
  ylab('Daily trips') +
  scale_y_continuous())